library(arrow)
library(tidyverse)
library(tidytext)
library(ggwordcloud)
df <- arrow::read_parquet("submissions/reddit_submissions_2021-03-02_23:27:18_UTC.parquet") %>% as_tibble() %>% mutate(subreddit = as_factor(subreddit))
df %>% summary()
 submission_id         title               text                    subreddit      hot_rank    
 Length:1500        Length:1500        Length:1500        investing     :116   Min.   :  0.0  
 Class :character   Class :character   Class :character   pennystocks   :327   1st Qu.: 93.0  
 Mode  :character   Mode  :character   Mode  :character   algotrading   :498   Median :211.0  
                                                          wallstreetbets:559   Mean   :226.5  
                                                                               3rd Qu.:340.2  
                                                                               Max.   :558.0  
tidytext_df <- df %>% 
  pivot_longer(cols = c('title','text'), 
               names_to = "text_type", 
               values_to = "text") %>% 
  mutate(text_type=as_factor(text_type)) %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words)
Joining, by = "word"
tidytext_df %>% summary()
 submission_id               subreddit        hot_rank     text_type          word          
 Length:128527      investing     :22879   Min.   :  0.0   title:  8410   Length:128527     
 Class :character   pennystocks   :50652   1st Qu.: 62.0   text :120117   Class :character  
 Mode  :character   algotrading   :30723   Median :132.0                  Mode  :character  
                    wallstreetbets:24273   Mean   :169.4                                    
                                           3rd Qu.:239.0                                    
                                           Max.   :558.0                                    
tidytext_df %>% filter(subreddit=="wallstreetbets") %>% group_by(subreddit, word) %>% count(sort=TRUE) 
tidytext_df %>% 
  count(word, sort=TRUE) %>% head(100) %>% 
  ggplot(aes(label=word, size=n)) + geom_text_wordcloud()

tidytext_df %>% 
  count(subreddit, word, sort=TRUE) %>% head(300) %>% 
  ggplot(aes(label=word, size=n, color=subreddit)) + 
  geom_text_wordcloud() + 
  facet_wrap(vars(subreddit))

many numbers - lets remove all alphabetical characters

tidytext_df %>% summary()
 submission_id               subreddit        hot_rank     text_type          word               afinn             bing           negative           fear       
 Length:128527      investing     :22879   Min.   :  0.0   title:  8410   Length:128527      Min.   :-5.00    Min.   :-1.00    Min.   :0.00     Min.   :0.00    
 Class :character   pennystocks   :50652   1st Qu.: 62.0   text :120117   Class :character   1st Qu.:-1.00    1st Qu.:-1.00    1st Qu.:0.00     1st Qu.:0.00    
 Mode  :character   algotrading   :30723   Median :132.0                  Mode  :character   Median : 1.00    Median : 1.00    Median :0.00     Median :0.00    
                    wallstreetbets:24273   Mean   :169.4                                     Mean   : 0.42    Mean   : 0.06    Mean   :0.24     Mean   :0.13    
                                           3rd Qu.:239.0                                     3rd Qu.: 2.00    3rd Qu.: 1.00    3rd Qu.:0.00     3rd Qu.:0.00    
                                           Max.   :558.0                                     Max.   : 5.00    Max.   : 1.00    Max.   :1.00     Max.   :1.00    
                                                                                             NA's   :119574   NA's   :119411   NA's   :106911   NA's   :106911  
    sadness           anger           disgust          positive          trust             joy          anticipation   
 Min.   :0.00     Min.   :0.0      Min.   :0.00     Min.   :0.00     Min.   :0.00     Min.   :0.00     Min.   :0.00    
 1st Qu.:0.00     1st Qu.:0.0      1st Qu.:0.00     1st Qu.:0.00     1st Qu.:0.00     1st Qu.:0.00     1st Qu.:0.00    
 Median :0.00     Median :0.0      Median :0.00     Median :1.00     Median :0.00     Median :0.00     Median :0.00    
 Mean   :0.09     Mean   :0.1      Mean   :0.04     Mean   :0.55     Mean   :0.33     Mean   :0.16     Mean   :0.27    
 3rd Qu.:0.00     3rd Qu.:0.0      3rd Qu.:0.00     3rd Qu.:1.00     3rd Qu.:1.00     3rd Qu.:0.00     3rd Qu.:1.00    
 Max.   :1.00     Max.   :1.0      Max.   :1.00     Max.   :1.00     Max.   :1.00     Max.   :1.00     Max.   :1.00    
 NA's   :106911   NA's   :106911   NA's   :106911   NA's   :106911   NA's   :106911   NA's   :106911   NA's   :106911  
tidytext_df %>% group_by(subreddit) %>% summarise(mean(afinn, na.rm = TRUE))
`summarise()` ungrouping output (override with `.groups` argument)
tidytext_df %>% ggplot(aes(x=afinn, fill=subreddit)) + geom_density(alpha=.4) + facet_wrap(vars(subreddit))

tidytext_df %>% ggplot(aes(x=afinn, fill=subreddit)) + geom_boxplot()

tidytext_df %>% 
  group_by(subreddit) %>% 
  summarise(mean=mean(afinn, na.rm = TRUE), 
            stderr=sd(afinn, na.rm = TRUE)/sqrt(n()), 
            ymin=mean-stderr, 
            ymax=mean+stderr) %>% 
  ggplot(aes(x=subreddit, y=mean)) + 
  geom_bar(stat="identity", fill="navy", alpha=.7) + geom_errorbar(aes(ymin=ymin, ymax=ymax)) + ylim(-1, 1)
`summarise()` ungrouping output (override with `.groups` argument)

tidytext_df %>% 
  group_by(subreddit) %>% 
  summarise(mean=mean(bing, na.rm = TRUE), 
            stderr=sd(bing, na.rm = TRUE)/sqrt(n()), 
            ymin=mean-stderr, 
            ymax=mean+stderr,
            count_positive=sum(if_else(bing==1, 1, 0), na.rm = TRUE),
            count_negative=sum(if_else(bing==-1, 1, 0), na.rm = TRUE),
            count_all = n(),
            portion_positive = count_positive / count_all,
            portion_negative = count_negative / count_all) %>% 
  select(subreddit, portion_positive, portion_negative) %>% 
  pivot_longer(portion_positive:portion_negative, names_to="sentiment", values_to="portion") %>% 
  ggplot(aes(x=subreddit, y=portion, fill=sentiment)) + geom_bar(stat="identity") + scale_fill_manual(values=c("navy","deeppink2")) +
  ylim(0,.2)
`summarise()` ungrouping output (override with `.groups` argument)

tfidf_df %>%
  group_by(subreddit) %>%
  slice_max(tf_idf, n = 40) %>%
  ungroup() %>%
  ggplot(aes(tf_idf, fct_reorder(word, tf_idf), fill = subreddit)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~subreddit, ncol = 4, scales = "free") +
  labs(x = "tf-idf", y = NULL)

tfidf_df %>% 
  inner_join(filter(nrc, positive==1)) %>% 
  group_by(subreddit) %>%
  slice_max(tf_idf, n = 40, with_ties=FALSE) %>%
  ungroup() %>%
  ggplot(aes(tf_idf, fct_reorder(word, tf_idf), fill = subreddit)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~subreddit, ncol = 4, scales = "free") +
  labs(x = "tf-idf", y = NULL)
Joining, by = "word"

tfidf_df %>% 
  inner_join(filter(nrc, negative==1)) %>% 
  group_by(subreddit) %>%
  slice_max(tf_idf, n = 40, with_ties=FALSE) %>%
  ungroup() %>%
  ggplot(aes(tf_idf, fct_reorder(word, tf_idf), fill = subreddit)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~subreddit, ncol = 4, scales = "free") +
  labs(x = "tf-idf", y = NULL)
Joining, by = "word"

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGFycm93KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeShnZ3dvcmRjbG91ZCkKYGBgCgpgYGB7cn0KZGYgPC0gYXJyb3c6OnJlYWRfcGFycXVldCgic3VibWlzc2lvbnMvcmVkZGl0X3N1Ym1pc3Npb25zXzIwMjEtMDMtMDJfMjM6Mjc6MThfVVRDLnBhcnF1ZXQiKSAlPiUgYXNfdGliYmxlKCkgJT4lIG11dGF0ZShzdWJyZWRkaXQgPSBhc19mYWN0b3Ioc3VicmVkZGl0KSkKYGBgCgpgYGB7cn0KZGYgJT4lIHN1bW1hcnkoKQpgYGAKCgpgYGB7cn0KdGlkeXRleHRfZGYgPC0gZGYgJT4lIAogIHBpdm90X2xvbmdlcihjb2xzID0gYygndGl0bGUnLCd0ZXh0JyksIAogICAgICAgICAgICAgICBuYW1lc190byA9ICJ0ZXh0X3R5cGUiLCAKICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gInRleHQiKSAlPiUgCiAgbXV0YXRlKHRleHRfdHlwZT1hc19mYWN0b3IodGV4dF90eXBlKSkgJT4lIAogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lIAogIGFudGlfam9pbihzdG9wX3dvcmRzKQoKdGlkeXRleHRfZGYgJT4lIHN1bW1hcnkoKQpgYGAKCgpgYGB7cn0KdGlkeXRleHRfZGYgJT4lIGZpbHRlcihzdWJyZWRkaXQ9PSJ3YWxsc3RyZWV0YmV0cyIpICU+JSBncm91cF9ieShzdWJyZWRkaXQsIHdvcmQpICU+JSBjb3VudChzb3J0PVRSVUUpIApgYGAKCmBgYHtyfQp0aWR5dGV4dF9kZiAlPiUgCiAgY291bnQod29yZCwgc29ydD1UUlVFKSAlPiUgaGVhZCgxMDApICU+JSAKICBnZ3Bsb3QoYWVzKGxhYmVsPXdvcmQsIHNpemU9bikpICsgZ2VvbV90ZXh0X3dvcmRjbG91ZCgpCmBgYAoKYGBge3J9CnRpZHl0ZXh0X2RmICU+JSAKICBjb3VudChzdWJyZWRkaXQsIHdvcmQsIHNvcnQ9VFJVRSkgJT4lIGhlYWQoMzAwKSAlPiUgCiAgZ2dwbG90KGFlcyhsYWJlbD13b3JkLCBzaXplPW4sIGNvbG9yPXN1YnJlZGRpdCkpICsgCiAgZ2VvbV90ZXh0X3dvcmRjbG91ZCgpICsgCiAgZmFjZXRfd3JhcCh2YXJzKHN1YnJlZGRpdCkpCmBgYApgYGB7cn0KdGlkeXRleHRfZGYgJT4lIAogIGNvdW50KHN1YnJlZGRpdCwgd29yZCkgJT4lIAogIGdyb3VwX2J5KHN1YnJlZGRpdCkgJT4lIAogIG11dGF0ZShwcm9wb3J0aW9uID0gbiAvIHN1bShuKSkKYGBgCgoKYGBge3IgZmlnLndpZHRoPTYsIGZpZy5oZWlnaHQ9Mn0KbGlicmFyeShzY2FsZXMpCgp0aWR5dGV4dF9kZiAlPiUgCiAgY291bnQoc3VicmVkZGl0LCB3b3JkKSAlPiUgCiAgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUgCiAgbXV0YXRlKHByb3BvcnRpb24gPSBuIC8gc3VtKG4pKSAlPiUgCiAgc2VsZWN0KC1uKSAlPiUgCiAgc3ByZWFkKHN1YnJlZGRpdCwgcHJvcG9ydGlvbikgJT4lIAogIGdhdGhlcihzdWJyZWRkaXQsIHByb3BvcnRpb24sIGludmVzdGluZzphbGdvdHJhZGluZykgJT4lIAogIGdncGxvdChhZXMoeCA9IHByb3BvcnRpb24sIHkgPSB3YWxsc3RyZWV0YmV0cywgY29sb3IgPSBhYnMod2FsbHN0cmVldGJldHMgLSBwcm9wb3J0aW9uKSkpICsKICBnZW9tX2FibGluZShjb2xvciA9ICJncmF5NDAiLCBsdHkgPSAyKSArCiAgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjEsIHNpemUgPSAyLjUsIHdpZHRoID0gMC4zLCBoZWlnaHQgPSAwLjMpICsKICBnZW9tX3RleHQoYWVzKGxhYmVsID0gd29yZCksIGNoZWNrX292ZXJsYXAgPSBUUlVFLCB2anVzdCA9IDEuNSkgKwogIHNjYWxlX3hfbG9nMTAobGFiZWxzID0gcGVyY2VudF9mb3JtYXQoKSkgKwogIHNjYWxlX3lfbG9nMTAobGFiZWxzID0gcGVyY2VudF9mb3JtYXQoKSkgKwogIHNjYWxlX2NvbG9yX2dyYWRpZW50KGxpbWl0cyA9IGMoMCwgMC4wMDEpLCBsb3cgPSAibmF2eSIsIGhpZ2ggPSAiZ3JheTUwIikgKwogIGZhY2V0X3dyYXAofnN1YnJlZGRpdCwgbmNvbCA9IDMpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKSArCiAgbGFicyh5ID0gIndhbGxzdHJlZXRiZXRzIiwgeCA9IE5VTEwpCmBgYAoKbWFueSBudW1iZXJzIC0gbGV0cyByZW1vdmUgYWxsIGFscGhhYmV0aWNhbCBjaGFyYWN0ZXJzCgpgYGB7ciBmaWcud2lkdGg9NiwgZmlnLmhlaWdodD0yfQp0aWR5dGV4dF9kZiAlPiUKICBtdXRhdGUod29yZCA9IHN0cl9leHRyYWN0KHdvcmQsICJbYS16J10rIikpICU+JSAKICBjb3VudChzdWJyZWRkaXQsIHdvcmQpICU+JSAKICBncm91cF9ieShzdWJyZWRkaXQpICU+JSAKICBtdXRhdGUocHJvcG9ydGlvbiA9IG4gLyBzdW0obikpICU+JSAKICBzZWxlY3QoLW4pICU+JSAKICBzcHJlYWQoc3VicmVkZGl0LCBwcm9wb3J0aW9uKSAlPiUgCiAgZ2F0aGVyKHN1YnJlZGRpdCwgcHJvcG9ydGlvbiwgaW52ZXN0aW5nOmFsZ290cmFkaW5nKSAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gcHJvcG9ydGlvbiwgeSA9IHdhbGxzdHJlZXRiZXRzLCBjb2xvciA9IGFicyh3YWxsc3RyZWV0YmV0cyAtIHByb3BvcnRpb24pKSkgKwogIGdlb21fYWJsaW5lKGNvbG9yID0gImdyYXk0MCIsIGx0eSA9IDIpICsKICBnZW9tX2ppdHRlcihhbHBoYSA9IDAuMSwgc2l6ZSA9IDIuNSwgd2lkdGggPSAwLjMsIGhlaWdodCA9IDAuMykgKwogIGdlb21fdGV4dChhZXMobGFiZWwgPSB3b3JkKSwgY2hlY2tfb3ZlcmxhcCA9IFRSVUUsIHZqdXN0ID0gMS41KSArCiAgc2NhbGVfeF9sb2cxMChsYWJlbHMgPSBwZXJjZW50X2Zvcm1hdCgpKSArCiAgc2NhbGVfeV9sb2cxMChsYWJlbHMgPSBwZXJjZW50X2Zvcm1hdCgpKSArCiAgc2NhbGVfY29sb3JfZ3JhZGllbnQobGltaXRzID0gYygwLCAwLjAwMSksIGxvdyA9ICJuYXZ5IiwgaGlnaCA9ICJncmF5NTAiKSArCiAgZmFjZXRfd3JhcCh+c3VicmVkZGl0LCBuY29sID0gMykgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpICsKICBsYWJzKHkgPSAid2FsbHN0cmVldGJldHMiLCB4ID0gTlVMTCkKYGBgCgoKCgpgYGB7ciBmaWcud2lkdGg9NiwgZmlnLmhlaWdodD0yfQpsaWJyYXJ5KFNub3diYWxsQykKdGlkeXRleHRfZGYgICU+JQogIG11dGF0ZSh3b3JkID0gU25vd2JhbGxDOjp3b3JkU3RlbSh3b3JkLCBsYW5ndWFnZSA9ICJwb3J0ZXIiKSkgJT4lCiAgbXV0YXRlKHdvcmQgPSBzdHJfZXh0cmFjdCh3b3JkLCAiW2EteiddKyIpKSAlPiUgCiAgY291bnQoc3VicmVkZGl0LCB3b3JkKSAlPiUgCiAgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUgCiAgbXV0YXRlKHByb3BvcnRpb24gPSBuIC8gc3VtKG4pKSAlPiUgCiAgc2VsZWN0KC1uKSAlPiUgCiAgc3ByZWFkKHN1YnJlZGRpdCwgcHJvcG9ydGlvbikgJT4lIAogIGdhdGhlcihzdWJyZWRkaXQsIHByb3BvcnRpb24sIGludmVzdGluZzphbGdvdHJhZGluZykgJT4lIAogIGdncGxvdChhZXMoeCA9IHByb3BvcnRpb24sIHkgPSB3YWxsc3RyZWV0YmV0cywgY29sb3IgPSBhYnMod2FsbHN0cmVldGJldHMgLSBwcm9wb3J0aW9uKSkpICsKICBnZW9tX2FibGluZShjb2xvciA9ICJncmF5NDAiLCBsdHkgPSAyKSArCiAgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjEsIHNpemUgPSAyLjUsIHdpZHRoID0gMC4zLCBoZWlnaHQgPSAwLjMpICsKICBnZW9tX3RleHQoYWVzKGxhYmVsID0gd29yZCksIGNoZWNrX292ZXJsYXAgPSBUUlVFLCB2anVzdCA9IDEuNSkgKwogIHNjYWxlX3hfbG9nMTAobGFiZWxzID0gcGVyY2VudF9mb3JtYXQoKSkgKwogIHNjYWxlX3lfbG9nMTAobGFiZWxzID0gcGVyY2VudF9mb3JtYXQoKSkgKwogIHNjYWxlX2NvbG9yX2dyYWRpZW50KGxpbWl0cyA9IGMoMCwgMC4wMDEpLCBsb3cgPSAibmF2eSIsIGhpZ2ggPSAiZ3JheTc1IikgKwogIGZhY2V0X3dyYXAofnN1YnJlZGRpdCwgbmNvbCA9IDMpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKSArCiAgbGFicyh5ID0gIndhbGxzdHJlZXRiZXRzIiwgeCA9IE5VTEwpCmBgYAoKCmBgYHtyfQphZmlubiA8LSBnZXRfc2VudGltZW50cygiYWZpbm4iKSAlPiUgcmVuYW1lKCJhZmlubiI9InZhbHVlIikKYmluZyA8LSBnZXRfc2VudGltZW50cygiYmluZyIpICU+JSBtdXRhdGUoImJpbmciPWFzLmludGVnZXIoc2VudGltZW50PT0icG9zaXRpdmUiKSoyLTEpICU+JSBzZWxlY3QoLXNlbnRpbWVudCkKbnJjIDwtIGdldF9zZW50aW1lbnRzKCJucmMiKSAlPiUgCiAgbXV0YXRlKG5lZ2F0aXZlID0gc2VudGltZW50PT0ibmVnYXRpdmUiLCAKICAgICAgICAgICAgICAgZmVhciA9IHNlbnRpbWVudD09ImZlYXIiLAogICAgICAgICAgICAgICBzYWRuZXNzID0gc2VudGltZW50PT0ic2FkbmVzcyIsCiAgICAgICAgICAgICAgIGFuZ2VyID0gc2VudGltZW50PT0iYW5nZXIiLAogICAgICAgICAgICAgICBkaXNndXN0ID0gc2VudGltZW50PT0iZGlzZ3VzdCIsCiAgICAgICAgICAgICAgIHBvc2l0aXZlID0gc2VudGltZW50PT0icG9zaXRpdmUiLAogICAgICAgICAgICAgICB0cnVzdCA9IHNlbnRpbWVudD09InRydXN0IiwKICAgICAgICAgICAgICAgam95ID0gc2VudGltZW50PT0iam95IiwKICAgICAgICAgICAgICAgYW50aWNpcGF0aW9uID0gc2VudGltZW50PT0iYW50aWNpcGF0aW9uIikgJT4lIAogIGdyb3VwX2J5KHdvcmQpICU+JSBzdW1tYXJpc2VfaWYoaXNfbG9naWNhbCwgc3VtKQoKCnRpZHl0ZXh0X2RmIDwtIHRpZHl0ZXh0X2RmICU+JSAKICBsZWZ0X2pvaW4oYWZpbm4sIGJ5PSJ3b3JkIikgJT4lCiAgbGVmdF9qb2luKGJpbmcsIGJ5PSJ3b3JkIikgJT4lIAogIGxlZnRfam9pbihucmMsIGJ5PSJ3b3JkIikKCnRpZHl0ZXh0X2RmICU+JSBzdW1tYXJ5KCkKYGBgCgoKCgpgYGB7cn0KdGlkeXRleHRfZGYgJT4lIGdyb3VwX2J5KHN1YnJlZGRpdCkgJT4lIHN1bW1hcmlzZShtZWFuKGFmaW5uLCBuYS5ybSA9IFRSVUUpKQoKdGlkeXRleHRfZGYgJT4lIGdncGxvdChhZXMoeD1hZmlubiwgZmlsbD1zdWJyZWRkaXQpKSArIGdlb21fZGVuc2l0eShhbHBoYT0uNCkgKyBmYWNldF93cmFwKHZhcnMoc3VicmVkZGl0KSkKdGlkeXRleHRfZGYgJT4lIGdncGxvdChhZXMoeD1hZmlubiwgZmlsbD1zdWJyZWRkaXQpKSArIGdlb21fYm94cGxvdCgpCnRpZHl0ZXh0X2RmICU+JSAKICBncm91cF9ieShzdWJyZWRkaXQpICU+JSAKICBzdW1tYXJpc2UobWVhbj1tZWFuKGFmaW5uLCBuYS5ybSA9IFRSVUUpLCAKICAgICAgICAgICAgc3RkZXJyPXNkKGFmaW5uLCBuYS5ybSA9IFRSVUUpL3NxcnQobigpKSwgCiAgICAgICAgICAgIHltaW49bWVhbi1zdGRlcnIsIAogICAgICAgICAgICB5bWF4PW1lYW4rc3RkZXJyKSAlPiUgCiAgZ2dwbG90KGFlcyh4PXN1YnJlZGRpdCwgeT1tZWFuKSkgKyAKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIGZpbGw9Im5hdnkiLCBhbHBoYT0uNykgKyBnZW9tX2Vycm9yYmFyKGFlcyh5bWluPXltaW4sIHltYXg9eW1heCkpICsgeWxpbSgtMSwgMSkKYGBgCgoKCgoKCmBgYHtyfQp0aWR5dGV4dF9kZiAlPiUgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUgc3VtbWFyaXNlKG1lYW4oYmluZywgbmEucm0gPSBUUlVFKSkKCnRpZHl0ZXh0X2RmICU+JSBnZ3Bsb3QoYWVzKHg9YmluZywgZmlsbD1zdWJyZWRkaXQpKSArIGdlb21fZGVuc2l0eShhbHBoYT0uNCkgKyBmYWNldF93cmFwKHZhcnMoc3VicmVkZGl0KSkKdGlkeXRleHRfZGYgJT4lIAogIGdyb3VwX2J5KHN1YnJlZGRpdCkgJT4lIAogIHN1bW1hcmlzZShtZWFuPW1lYW4oYmluZywgbmEucm0gPSBUUlVFKSwgCiAgICAgICAgICAgIHN0ZGVycj1zZChiaW5nLCBuYS5ybSA9IFRSVUUpL3NxcnQobigpKSwgCiAgICAgICAgICAgIHltaW49bWVhbi1zdGRlcnIsIAogICAgICAgICAgICB5bWF4PW1lYW4rc3RkZXJyKSAlPiUgCiAgZ2dwbG90KGFlcyh4PXN1YnJlZGRpdCwgeT1tZWFuKSkgKyAKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIGZpbGw9Im5hdnkiLCBhbHBoYT0uNykgKyBnZW9tX2Vycm9yYmFyKGFlcyh5bWluPXltaW4sIHltYXg9eW1heCkpICsgeWxpbSgtMSwgMSkKCnRpZHl0ZXh0X2RmICU+JSAKICBncm91cF9ieShzdWJyZWRkaXQpICU+JSAKICBzdW1tYXJpc2UobWVhbj1tZWFuKGJpbmcsIG5hLnJtID0gVFJVRSksIAogICAgICAgICAgICBzdGRlcnI9c2QoYmluZywgbmEucm0gPSBUUlVFKS9zcXJ0KG4oKSksIAogICAgICAgICAgICB5bWluPW1lYW4tc3RkZXJyLCAKICAgICAgICAgICAgeW1heD1tZWFuK3N0ZGVyciwKICAgICAgICAgICAgY291bnRfcG9zaXRpdmU9c3VtKGlmX2Vsc2UoYmluZz09MSwgMSwgMCksIG5hLnJtID0gVFJVRSksCiAgICAgICAgICAgIGNvdW50X25lZ2F0aXZlPXN1bShpZl9lbHNlKGJpbmc9PS0xLCAxLCAwKSwgbmEucm0gPSBUUlVFKSwKICAgICAgICAgICAgY291bnRfYWxsID0gbigpLAogICAgICAgICAgICBwb3J0aW9uX3Bvc2l0aXZlID0gY291bnRfcG9zaXRpdmUgLyBjb3VudF9hbGwsCiAgICAgICAgICAgIHBvcnRpb25fbmVnYXRpdmUgPSBjb3VudF9uZWdhdGl2ZSAvIGNvdW50X2FsbCkgJT4lIAogIHNlbGVjdChzdWJyZWRkaXQsIHBvcnRpb25fcG9zaXRpdmUsIHBvcnRpb25fbmVnYXRpdmUpICU+JSAKICBwaXZvdF9sb25nZXIocG9ydGlvbl9wb3NpdGl2ZTpwb3J0aW9uX25lZ2F0aXZlLCBuYW1lc190bz0ic2VudGltZW50IiwgdmFsdWVzX3RvPSJwb3J0aW9uIikgJT4lIAogIGdncGxvdChhZXMoeD1zdWJyZWRkaXQsIHk9cG9ydGlvbiwgZmlsbD1zZW50aW1lbnQpKSArIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKyBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXM9YygibmF2eSIsImRlZXBwaW5rMiIpKSArCiAgeWxpbSgwLC4yKQoKCmBgYAoKCgpgYGB7ciBmaWcuaGVpZ2h0PTUsIGZpZy53aWR0aD00fQp0aWR5dGV4dF9kZiAlPiUgCiAgc2VsZWN0KHN1YnJlZGRpdCwgbmVnYXRpdmU6YW50aWNpcGF0aW9uKSAlPiUgCiAgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUgCiAgc3VtbWFyaXplX2FsbCh+IHN1bSgueCwgbmEucm0gPSBUUlVFKS9uKCkpICU+JSAKICBwaXZvdF9sb25nZXIoY29scz1uZWdhdGl2ZTphbnRpY2lwYXRpb24sIG5hbWVzX3RvPSJzZW50aW1lbnQiLCB2YWx1ZXNfdG89InByb3BvcnRpb24iKSAlPiUgCiAgZ2dwbG90KGFlcyh4PWZjdF9yZW9yZGVyKHN1YnJlZGRpdCxwcm9wb3J0aW9uKSwgZmlsbD1zdWJyZWRkaXQsIHk9cHJvcG9ydGlvbioxMDApKSArIAogIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKyAKICBmYWNldF9ncmlkKGZjdF9yZW9yZGVyKHNlbnRpbWVudCwgcHJvcG9ydGlvbil+LikgKyAKICBjb29yZF9mbGlwKCkgKyBsYWJzKHg9ICJzdWJyZWRkaXQiLCB5ID0gInBlcmNlbnQgb2YgdG9rZW5zIikKYGBgCgpgYGB7ciBmaWcuaGVpZ2h0PTUsIGZpZy53aWR0aD00fQp0aWR5dGV4dF9kZiAlPiUgCiAgc2VsZWN0KHN1YnJlZGRpdCwgbmVnYXRpdmU6YW50aWNpcGF0aW9uKSAlPiUgCiAgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUgCiAgc3VtbWFyaXplX2FsbCh+IHN1bSgueCwgbmEucm0gPSBUUlVFKS9zdW0oaWZfZWxzZSghaXMubmEoLngpLCAxLCAwKSkpICU+JSAKICBwaXZvdF9sb25nZXIoY29scz1uZWdhdGl2ZTphbnRpY2lwYXRpb24sIG5hbWVzX3RvPSJzZW50aW1lbnQiLCB2YWx1ZXNfdG89InByb3BvcnRpb24iKSAlPiUgCiAgZ2dwbG90KGFlcyh4PWZjdF9yZW9yZGVyKHN1YnJlZGRpdCxwcm9wb3J0aW9uKSwgZmlsbD1zdWJyZWRkaXQsIHk9cHJvcG9ydGlvbioxMDApKSArIAogIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKyAKICBmYWNldF9ncmlkKGZjdF9yZW9yZGVyKHNlbnRpbWVudCwgcHJvcG9ydGlvbil+LikgKyAKICBjb29yZF9mbGlwKCkgKyBsYWJzKHg9ICJzdWJyZWRkaXQiLCB5ID0gInBlcmNlbnQgb2YgdG9rZW5zIiwgc3VidGl0bGU9Im5vcm1hbGl6ZSBieSBudW1iZXIgb2Ygd29yZHMgd2l0aCBhbnkgc2VudGltZW50IikKYGBgCgoKCgpgYGB7cn0KdGZpZGZfZGYgPC0gdGlkeXRleHRfZGYgJT4lIGdyb3VwX2J5KHN1YnJlZGRpdCwgd29yZCkgJT4lIGNvdW50KCkgJT4lIGJpbmRfdGZfaWRmKHdvcmQsIHN1YnJlZGRpdCwgbikKdGZpZGZfZGYgJT4lIGFycmFuZ2UoZGVzYyh0Zl9pZGYpKQpgYGAKCgoKYGBge3IgZmlnLmhlaWdodD00LCBmaWcud2lkdGg9Nn0KdGZpZGZfZGYgJT4lCiAgZ3JvdXBfYnkoc3VicmVkZGl0KSAlPiUKICBzbGljZV9tYXgodGZfaWRmLCBuID0gNDApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBnZ3Bsb3QoYWVzKHRmX2lkZiwgZmN0X3Jlb3JkZXIod29yZCwgdGZfaWRmKSwgZmlsbCA9IHN1YnJlZGRpdCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh+c3VicmVkZGl0LCBuY29sID0gNCwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh4ID0gInRmLWlkZiIsIHkgPSBOVUxMKQpgYGAKCgpgYGB7ciBmaWcuaGVpZ2h0PTQsIGZpZy53aWR0aD02fQpucmMKCnRpZHl0ZXh0X2RmICU+JSAKICBmaWx0ZXIgJT4lIAogIHNlbGVjdChzdWJyZWRkaXQsIG5lZ2F0aXZlOmFudGljaXBhdGlvbikgJT4lIAogIGdyb3VwX2J5KHN1YnJlZGRpdCkgJT4lIAogIHN1bW1hcml6ZV9hbGwofiBzdW0oLngsIG5hLnJtID0gVFJVRSkvc3VtKGlmX2Vsc2UoIWlzLm5hKC54KSwgMSwgMCkpKSAlPiUgCiAgcGl2b3RfbG9uZ2VyKGNvbHM9bmVnYXRpdmU6YW50aWNpcGF0aW9uLCBuYW1lc190bz0ic2VudGltZW50IiwgdmFsdWVzX3RvPSJwcm9wb3J0aW9uIikKCgp0ZmlkZl9kZiAlPiUgCiAgaW5uZXJfam9pbihmaWx0ZXIobnJjLCBwb3NpdGl2ZT09MSkpICU+JSAKICBncm91cF9ieShzdWJyZWRkaXQpICU+JQogIHNsaWNlX21heCh0Zl9pZGYsIG4gPSA0MCwgd2l0aF90aWVzPUZBTFNFKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgZ2dwbG90KGFlcyh0Zl9pZGYsIGZjdF9yZW9yZGVyKHdvcmQsIHRmX2lkZiksIGZpbGwgPSBzdWJyZWRkaXQpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofnN1YnJlZGRpdCwgbmNvbCA9IDQsIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeCA9ICJ0Zi1pZGYiLCB5ID0gTlVMTCkKYGBgCgoKYGBge3J9Cgp0ZmlkZl9kZiAlPiUgCiAgaW5uZXJfam9pbihmaWx0ZXIobnJjLCBuZWdhdGl2ZT09MSkpICU+JSAKICBncm91cF9ieShzdWJyZWRkaXQpICU+JQogIHNsaWNlX21heCh0Zl9pZGYsIG4gPSA0MCwgd2l0aF90aWVzPUZBTFNFKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgZ2dwbG90KGFlcyh0Zl9pZGYsIGZjdF9yZW9yZGVyKHdvcmQsIHRmX2lkZiksIGZpbGwgPSBzdWJyZWRkaXQpKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGZhY2V0X3dyYXAofnN1YnJlZGRpdCwgbmNvbCA9IDQsIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeCA9ICJ0Zi1pZGYiLCB5ID0gTlVMTCkKYGBgCiAKCgoK